package com.space.netspider;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Spider;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;


/**
 * @Author yangyf
 * @Date 2021/6/9 0009 17:24
 * @Version 1.0
 */
public class NetSpiderMain {
    private static final Logger logger = LoggerFactory.getLogger(NetSpiderMain.class);
    public static String URL = "https://www.64365.com/shanxi1/lawyer/page_1.aspx";
    public static String OUT_PATH = "C:\\Users\\Administrator\\Desktop\\lawyer.xls";
    public static List<LawyerBean> lawyerBeanList = Collections.synchronizedList(new ArrayList<>());
    public static void main(String[] args) {
        // 爬取网站的入口地址
        try {

            for(String key:LawyerUrlInfo.allAreLawyerInfos.keySet()){
                String val = LawyerUrlInfo.allAreLawyerInfos.get(key);
                String URL = LawyerUrlInfo.prefix + key + LawyerUrlInfo.start_url_suffix;
                String regex_URL = LawyerUrlInfo.page_prefix + key + LawyerUrlInfo.page_suffix;
                TelProcessor telProcessor = new TelProcessor(lawyerBeanList,regex_URL);
                Spider.create(telProcessor).addUrl(URL).thread(5).run();
                WriteExcel.write(OUT_PATH,val,lawyerBeanList);
                lawyerBeanList.clear();
            }


        }catch (Exception e){
            e.printStackTrace();
        }
    }
}
